Linux: upgrade to 2.6.16.13.

author cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>

Thu, 4 May 2006 16:38:25 +0000 (17:38 +0100)

committer cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>

Thu, 4 May 2006 16:38:25 +0000 (17:38 +0100)
author cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
Thu, 4 May 2006 16:38:25 +0000 (17:38 +0100)
committer cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
Thu, 4 May 2006 16:38:25 +0000 (17:38 +0100)
diff --git a/buildconfigs/mk.linux-2.6-xen b/buildconfigs/mk.linux-2.6-xen

index d784d175e0f0c8190c56fc2e7fe215afa9d62262..04070337f1ac9d18411bc89b2a6821badcf0ed14 100644 (file)
--- a/buildconfigs/mk.linux-2.6-xen
+++ b/buildconfigs/mk.linux-2.6-xen
@@ -1,5 +1,5 @@
  LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.16
+LINUX_VER    = 2.6.16.13
  
  EXTRAVERSION ?= xen
  
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c

index da2d48e1784541690f25ef095e46b7931b05702d..c1e240926d92b3009383d6b9eb193e72671bf12c 100644 (file)
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
@@ -43,6 +43,7 @@
  #include <linux/smp_lock.h>
  #include <linux/highmem.h>
  #include <linux/ptrace.h>
+#include <linux/audit.h>
  
  #include <asm/uaccess.h>
  #include <asm/io.h>
@@ -258,6 +259,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
  #ifndef CONFIG_X86_NO_TSS
         struct tss_struct *tss;
  #endif
+       long eax;
  /*
   * make sure the vm86() system call doesn't try to do anything silly
   */
@@ -313,13 +315,19 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
         tsk->thread.screen_bitmap = info->screen_bitmap;
         if (info->flags & VM86_SCREEN_BITMAP)
                 mark_screen_rdonly(tsk->mm);
+       __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t");
+       __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
+
+       /*call audit_syscall_exit since we do not exit via the normal paths */
+       if (unlikely(current->audit_context))
+               audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+
         __asm__ __volatile__(
-               "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
                 "movl %0,%%esp\n\t"
                 "movl %1,%%ebp\n\t"
                 "jmp resume_userspace"
                 : /* no outputs */
-               :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
+               :"r" (&info->regs), "r" (task_thread_info(tsk)));
         /* we never return here */
  }
  
diff --git a/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile b/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile

index 6dcd7cf5ac93dc669a58c50887398afb9a70bba4..a84151e4ca82365c1b7dad1bd74b891c7f816a83 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile
@@ -28,11 +28,11 @@ $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
  $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
         $(call if_changed,syscall)
  
-AFLAGS_vsyscall-sysenter.o = -m32 -Iarch/i386/kernel
-AFLAGS_vsyscall-syscall.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel
+AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel
  
  ifdef CONFIG_XEN
-AFLAGS_vsyscall-int80.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel
  CFLAGS_syscall32-xen.o += -DUSE_INT80
  AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80
  
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S

index 2930d58da273d560687121273a70ea495d9e6d18..05e7ce0b7eae67e9155d0b201fad0e47cf2988d3 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
@@ -221,6 +221,10 @@ rff_trace:
   *
   * XXX if we had a free scratch register we could save the RSP into the stack frame
   *      and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
   */                                    
  
  ENTRY(system_call)
@@ -289,7 +293,10 @@ sysret_signal:
         xorl %esi,%esi # oldset -> arg2
         call ptregscall_common
  1:     movl $_TIF_NEED_RESCHED,%edi
-       jmp sysret_check
+       /* Use IRET because user could have changed frame. This
+          works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+       cli
+       jmp int_with_check
         
  badsys:
         movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -315,7 +322,8 @@ tracesys:
         call syscall_trace_leave
         RESTORE_TOP_OF_STACK %rbx
         RESTORE_REST
-       jmp ret_from_sys_call
+       /* Use IRET because user could have changed frame */
+       jmp int_ret_from_sys_call
         CFI_ENDPROC
                 
  /* 
@@ -449,25 +457,9 @@ ENTRY(stub_execve)
         CFI_ADJUST_CFA_OFFSET -8
         CFI_REGISTER rip, r11
         SAVE_REST
-       movq %r11, %r15
-       CFI_REGISTER rip, r15
         FIXUP_TOP_OF_STACK %r11
         call sys_execve
-       GET_THREAD_INFO(%rcx)
-       bt $TIF_IA32,threadinfo_flags(%rcx)
-       CFI_REMEMBER_STATE
-       jc exec_32bit
         RESTORE_TOP_OF_STACK %r11
-       movq %r15, %r11
-       CFI_REGISTER rip, r11
-       RESTORE_REST
-       pushq %r11
-       CFI_ADJUST_CFA_OFFSET 8
-       CFI_REL_OFFSET rip, 0
-       ret
-
-exec_32bit:
-       CFI_RESTORE_STATE
         movq %rax,RAX(%rsp)
         RESTORE_REST
         jmp int_ret_from_sys_call
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c

index c761d703edb837a5a68b4f169b2d422b3400a984..6e4e62e86c30f6df152fcca72132bafba0b9a5d3 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
@@ -484,6 +484,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
          * This is basically '__unlazy_fpu', except that we queue a
          * multicall to indicate FPU task switch, rather than
          * synchronously trapping to Xen.
+        * This must be here to ensure both math_state_restore() and
+        * kernel_fpu_begin() work consistently.
+        * The AMD workaround requires it to be after DS reload, or
+        * after DS has been cleared, which we do in __prepare_arch_switch.
          */
         if (prev_p->thread_info->status & TS_USEDFPU) {
                 __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c

index df9c1ddbf6fc8537aa9d1127f9619c807422e2b7..b6ed190a2d165015493da92a1878ca870611623b 100644 (file)
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
@@ -1157,6 +1157,10 @@ static int __init init_amd(struct cpuinfo_x86 *c)
         if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
                 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
  
+       /* Enable workaround for FXSAVE leak */
+       if (c->x86 >= 6)
+               set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+
         r = get_model_name(c);
         if (!r) { 
                 switch (c->x86) { 
diff --git a/linux-2.6-xen-sparse/drivers/char/tty_io.c b/linux-2.6-xen-sparse/drivers/char/tty_io.c

index 752834fd138e4832254a611d94269c6d4480327b..f6f06897719686e80da39b0ef4f63524ceff1faf 100644 (file)
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c
@@ -2708,7 +2708,11 @@ static void __do_SAK(void *arg)
                 }
                 task_lock(p);
                 if (p->files) {
-                       rcu_read_lock();
+                       /*
+                        * We don't take a ref to the file, so we must
+                        * hold ->file_lock instead.
+                        */
+                       spin_lock(&p->files->file_lock);
                         fdt = files_fdtable(p->files);
                         for (i=0; i < fdt->max_fds; i++) {
                                 filp = fcheck_files(p->files, i);
@@ -2723,7 +2727,7 @@ static void __do_SAK(void *arg)
                                         break;
                                 }
                         }
-                       rcu_read_unlock();
+                       spin_unlock(&p->files->file_lock);
                 }
                 task_unlock(p);
         } while_each_task_pid(session, PIDTYPE_SID, p);
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h

index d883e06607f98148dc61a0122bd2963f569925e2..00fd80db971f991aa6dc4f65e9e6fe724a66295a 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
@@ -33,6 +33,9 @@
  
  #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
  
+#define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
+
  #define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
  #define pte_same(a, b)         ((a).pte_low == (b).pte_low)
  #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h

index 9b2d6c547fc39f755b6c33017f5adf76d2757bbd..528cc0478f21f432dbe65e898240306bdee41828 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
@@ -107,6 +107,26 @@ static inline void pud_clear (pud_t * pud) { }
  #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
                         pmd_index(address))
  
+/*
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
+ * entry, so clear the bottom half first and enforce ordering with a compiler
+ * barrier.
+ */
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+       ptep->pte_low = 0;
+       smp_wmb();
+       ptep->pte_high = 0;
+}
+
+static inline void pmd_clear(pmd_t *pmd)
+{
+       u32 *tmp = (u32 *)pmd;
+       *tmp = 0;
+       smp_wmb();
+       *(tmp + 1) = 0;
+}
+
  static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  {
         pte_t res;
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h

index 224c1032c7d8e623431ebb56a10b5f5bc0f12622..6bed6669c52693102af5b27570cdc21518ad0682 100644 (file)
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
@@ -205,14 +205,12 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
  extern unsigned long pg0[];
  
  #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
  
  /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
  #define pmd_none(x)    (!(unsigned long)pmd_val(x))
  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
     can temporarily clear it. */
  #define pmd_present(x) (pmd_val(x))
-#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
  #define pmd_bad(x)     ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
  
  
@@ -272,16 +270,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
         pte_t pte;
         if (full) {
                 pte = *ptep;
-#ifdef CONFIG_X86_PAE
-               /* Cannot do this in a single step, as the compiler may
-                  issue the two stores in either order, but the hypervisor
-                  must not see the high part before the low one. */
-               ptep->pte_low = 0;
-               barrier();
-               ptep->pte_high = 0;
-#else
-               *ptep = __pte(0);
-#endif
+               pte_clear(mm, addr, ptep);
         } else {
                 pte = ptep_get_and_clear(mm, addr, ptep);
         }
diff --git a/linux-2.6-xen-sparse/include/linux/mm.h b/linux-2.6-xen-sparse/include/linux/mm.h

index 338f4823589ae4bab4d0eea3eeeea46155921551..778439edc7b7c6c2641f9fc3451a86d9942523c6 100644 (file)
--- a/linux-2.6-xen-sparse/include/linux/mm.h
+++ b/linux-2.6-xen-sparse/include/linux/mm.h
@@ -232,10 +232,9 @@ struct page {
                 unsigned long private;          /* Mapping-private opaque data:
                                                  * usually used for buffer_heads
                                                  * if PagePrivate set; used for
-                                                * swp_entry_t if PageSwapCache.
-                                                * When page is free, this
+                                                * swp_entry_t if PageSwapCache;
                                                  * indicates order in the buddy
-                                                * system.
+                                                * system if PG_buddy is set.
                                                  */
                 struct address_space *mapping;  /* If low bit clear, points to
                                                  * inode address_space, or NULL.
diff --git a/linux-2.6-xen-sparse/mm/page_alloc.c b/linux-2.6-xen-sparse/mm/page_alloc.c

index 3ef836209dd0eac84fb1403132f4f5a4f562e07f..c0f3c605371eb9b15b22e9937b7ce2cc7a78b847 100644 (file)
--- a/linux-2.6-xen-sparse/mm/page_alloc.c
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c
@@ -153,7 +153,8 @@ static void bad_page(struct page *page)
                         1 << PG_reclaim |
                         1 << PG_slab    |
                         1 << PG_swapcache |
-                       1 << PG_writeback );
+                       1 << PG_writeback |
+                       1 << PG_buddy );
         set_page_count(page, 0);
         reset_page_mapcount(page);
         page->mapping = NULL;
@@ -224,12 +225,12 @@ static inline unsigned long page_order(struct page *page) {
  
  static inline void set_page_order(struct page *page, int order) {
         set_page_private(page, order);
-       __SetPagePrivate(page);
+       __SetPageBuddy(page);
  }
  
  static inline void rmv_page_order(struct page *page)
  {
-       __ClearPagePrivate(page);
+       __ClearPageBuddy(page);
         set_page_private(page, 0);
  }
  
@@ -268,11 +269,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
   * This function checks whether a page is free && is the buddy
   * we can do coalesce a page and its buddy if
   * (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
   *
+ * For recording page's order, we use page_private(page).
   */
  static inline int page_is_buddy(struct page *page, int order)
  {
@@ -281,10 +284,10 @@ static inline int page_is_buddy(struct page *page, int order)
                 return 0;
  #endif
  
-       if (PagePrivate(page)           &&
-           (page_order(page) == order) &&
-            page_count(page) == 0)
+       if (PageBuddy(page) && page_order(page) == order) {
+               BUG_ON(page_count(page) != 0);
                 return 1;
+       }
         return 0;
  }
  
@@ -301,7 +304,7 @@ static inline int page_is_buddy(struct page *page, int order)
   * as necessary, plus some accounting needed to play nicely with other
   * parts of the VM system.
   * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
   * order is recorded in page_private(page) field.
   * So when we are allocating or freeing one, we can derive the state of the
   * other.  That is, if we allocate a small block, and both were   
@@ -364,7 +367,8 @@ static inline int free_pages_check(struct page *page)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
         if (PageDirty(page))
                 __ClearPageDirty(page);
@@ -523,7 +527,8 @@ static int prep_new_page(struct page *page, int order)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
  
         /*
diff --git a/linux-2.6-xen-sparse/net/core/dev.c b/linux-2.6-xen-sparse/net/core/dev.c

index 55870e12540e19187aed578a108ea14f193d54b5..87c770eedc45ed284b4026191fa9ded4cd51bcf5 100644 (file)
--- a/linux-2.6-xen-sparse/net/core/dev.c
+++ b/linux-2.6-xen-sparse/net/core/dev.c
@@ -2994,11 +2994,11 @@ void netdev_run_todo(void)
  
                 switch(dev->reg_state) {
                 case NETREG_REGISTERING:
+                       dev->reg_state = NETREG_REGISTERED;
                         err = netdev_register_sysfs(dev);
                         if (err)
                                 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
                                        dev->name, err);
-                       dev->reg_state = NETREG_REGISTERED;
                         break;
  
                 case NETREG_UNREGISTERING:
author	cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
	Thu, 4 May 2006 16:38:25 +0000 (17:38 +0100)
committer	cl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
	Thu, 4 May 2006 16:38:25 +0000 (17:38 +0100)
buildconfigs/mk.linux-2.6-xen		patch \| blob \| history
linux-2.6-xen-sparse/arch/i386/kernel/vm86.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c		patch \| blob \| history
linux-2.6-xen-sparse/drivers/char/tty_io.c		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h		patch \| blob \| history
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h		patch \| blob \| history
linux-2.6-xen-sparse/include/linux/mm.h		patch \| blob \| history
linux-2.6-xen-sparse/mm/page_alloc.c		patch \| blob \| history
linux-2.6-xen-sparse/net/core/dev.c		patch \| blob \| history